import pandas as pd
import plotly.graph_objects as go
import plotly.express as px
df= pd.read_csv('F:/OneDrive/MYW/data_science/airbnb-SH-listings.csv/listings-locations.csv')
import json
with open('F:/OneDrive/MYW/data_science/geojson-china/data-master/json/geo/china/province-city/shanghai.geojson', encoding= 'utf-8') as f:
districts_map = json.load(f)
districts_map['features']
col = "aliceblue, antiquewhite, aqua, aquamarine, azure,beige, bisque, black, blanchedalmond, blue,blueviolet, brown, burlywood, cadetblue,chartreuse, chocolate".split(",")
nei = set(df['neighbourhood'].values.tolist())
k_v = {k:v for k,v in zip(nei,col)}
df['col'] = df['neighbourhood'].apply(lambda x:k_v[x])
fig = px.scatter_mapbox(df,
lon = 'longitude',
lat = 'latitude',
color ="col",
hover_name ='name',
hover_data = None,
# color_continuous_scale = px.colors.carto.Temps
)
fig.update_layout(
mapbox = {'accesstoken': 'pk.eyJ1IjoicGlnZ3lzcDExMDIiLCJhIjoiY2t4ajc1YzlhMHJvcjJ2cXdhb3I5c3JwMiJ9.8Y2WR2f5TrE5DqEyO-rt3g', 'center':{ 'lat':31.224361,'lon':121.469170}, 'zoom':8},
margin = {'l':0, 'r':0, 't':0, 'b':0} )
geo = dict(
scope = 'asia',
showland = True,
landcolor = 'rgb(212,212,212)',
subunitcolor = 'rgb(255,255,255)',
countrycolor = 'rgb(255,255,255)',
showlakes = True,
showcountries = True,
resolution = 50,
projection = dict(
type = 'conic conformal',
rotation_lon = -100
),
lonaxis = dict(
showgrid = True,
gridwidth = 0.5,
range = [120.8, 122],
dtick = 0.1
),
lataxis = dict(
showgrid = True,
gridwidth = 0.5,
range = [30.5,32],
dtick = 0.1)
),
fig.show()
pip install jieba
Requirement already satisfied: jieba in e:\anaconda\lib\site-packages (0.42.1) Note: you may need to restart the kernel to use updated packages.
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
from nltk import word_tokenize
from collections import Counter
import re
import jieba
from wordcloud import WordCloud
china = ""
english = ""
plt.rcParams['font.sans-serif'] = ['SimHei'] # 用来正常显示中文标签
plt.rcParams['axes.unicode_minus'] = False # 用来正常显示负号
df= pd.read_csv('F:/OneDrive/MYW/data_science/airbnb-SH-listings.csv/listings-locations.csv')
df['neighbourhood'] = df['neighbourhood'].apply(lambda x:x.split("/")[0].strip())
df1 = df.groupby('neighbourhood')['host_id'].count()
plt.pie(x=df1.values,labels=df1.index.values)
# plt.show()
([<matplotlib.patches.Wedge at 0x1c61963a9d0>, <matplotlib.patches.Wedge at 0x1c61963aeb0>, <matplotlib.patches.Wedge at 0x1c619648370>, <matplotlib.patches.Wedge at 0x1c6196487f0>, <matplotlib.patches.Wedge at 0x1c619648c70>, <matplotlib.patches.Wedge at 0x1c61964d130>, <matplotlib.patches.Wedge at 0x1c61964d5b0>, <matplotlib.patches.Wedge at 0x1c61964da30>, <matplotlib.patches.Wedge at 0x1c61964deb0>, <matplotlib.patches.Wedge at 0x1c619651370>, <matplotlib.patches.Wedge at 0x1c61a054970>, <matplotlib.patches.Wedge at 0x1c619651c40>, <matplotlib.patches.Wedge at 0x1c61961c100>, <matplotlib.patches.Wedge at 0x1c61961c580>, <matplotlib.patches.Wedge at 0x1c61961ca00>, <matplotlib.patches.Wedge at 0x1c61961ce80>], [Text(1.0962746501780334, 0.0904538079741836, '嘉定区'), Text(1.0789790127904655, 0.2140193681836125, '奉贤区'), Text(1.0548455475098957, 0.311930875192804, '宝山区'), Text(0.9312214730007253, 0.5855139351222647, '崇明区'), Text(0.5444574577351535, 0.9558065058978066, '徐汇区'), Text(0.20406727632042368, 1.0809054291357612, '普陀区'), Text(0.0720124213695824, 1.0976402922490092, '杨浦区'), Text(-0.1341477258602501, 1.0917895345012807, '松江区'), Text(-1.091737001258064, 0.13457458929549085, '浦东新区'), Text(-0.43463450461605924, -1.0104913890762024, '虹口区'), Text(-0.33172836941676404, -1.048788009525326, '金山区'), Text(-0.21239206368419905, -1.0793005194495031, '长宁区'), Text(0.0618330366225734, -1.0982607502692754, '闵行区'), Text(0.37316080364225124, -1.0347709962233524, '青浦区'), Text(0.6799353825369264, -0.864689467713331, '静安区'), Text(1.0301725734554428, -0.3856740448878954, '黄浦区')])
plt.savefig("pie.png",dpi=300)
plt.cla()
df2 = df.groupby("room_type")['host_id'].count()
p1 = plt.bar(x=df2.index,height=df2.values)
plt.bar_label(p1, label_type= edge)
plt.xlabel("房屋类型")
plt.ylabel("计数")
--------------------------------------------------------------------------- AttributeError Traceback (most recent call last) <ipython-input-13-032d76723afb> in <module> 3 df2 = df.groupby("room_type")['host_id'].count() 4 p1 = plt.bar(x=df2.index,height=df2.values) ----> 5 plt.bar_label(p1, label_type= edge) 6 plt.xlabel("房屋类型") 7 plt.ylabel("计数") AttributeError: module 'matplotlib.pyplot' has no attribute 'bar_label'
plt.savefig("bar.png",dpi=300)
plt.cla()
df3 = pd.read_csv("F:/OneDrive/MYW/data_science/airbnb-SH-listings.csv/listings.csv")
df4 = df3[['host_since','number_of_reviews']]
df4["host_since"] = pd.to_datetime(df4['host_since'])
df4['day_count'] = max(df4['host_since']) - df4['host_since']
print(df4['day_count'].dt.days)
plt.scatter(x=df4['day_count'].dt.days,y=df4['number_of_reviews'])
plt.xlabel("注册时间")
plt.ylabel("评论数")
0 4232.0
1 3790.0
2 3790.0
3 3790.0
4 3790.0
...
28120 830.0
28121 2573.0
28122 2573.0
28123 547.0
28124 0.0
Name: day_count, Length: 28125, dtype: float64
<ipython-input-14-e20ed5bfd76c>:5: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy df4["host_since"] = pd.to_datetime(df4['host_since']) <ipython-input-14-e20ed5bfd76c>:6: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy df4['day_count'] = max(df4['host_since']) - df4['host_since']
Text(0, 0.5, '评论数')